Intro

In this script, I load exchange data from datras and extract CA data (i.e. biological data), which is used for the condition model. In theory I could have joined the CPUE data to the CA data and get all important covariates and variables (oxygen, depth, ices information, densities of cod and flounder). However, the CPUE data has been standardized with respect to trawl speed, gear dimension, sweep length and trawl duration. Because many haul id’s did not have this information, the CPUE data has fewer id’s. In order to not lose condition data because I don’t have haul-level CPUE of cod and flounder, I will instead repeat the data cleaning process here, fit models to cod and flounder CPUE and predict at the location of the condition data.

Load libraries

rm(list = ls())

library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
remotes::install_github("pbs-assess/sdmTMB"); library(sdmTMB)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)

world <- ne_countries(scale = "medium", returnclass = "sf")

For maps

# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

#ggplot(swe_coast_proj) + geom_sf() 

Read data

# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv")

# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv")

Standardize catch data

Standardize ships

# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(bits_hh$Ship))
#>  [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"

# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)

bits_hh <- bits_hh %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_ca <- bits_ca %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

Standardize countries

# Now check which country codes are used
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"

# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_ca <- bits_ca %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#>  [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"

Create a simple ID that works across all exchange data

# Create ID column
bits_ca <- bits_ca %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hh <- bits_hh %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

# Works like a haul-id
# bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)

Create the same unique haul-ID in the cpue data that I have in the sweep-file

bits_hh <- bits_hh %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

Clean DATRAS EXCHANGE data

# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
  filter(HaulVal %in% c("A","N","V"))

# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)

# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")

pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat), 
                     proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

bits_hh$SD <- over(pts, shape)$Area_27

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$SD))
#>  [1] "3.a.20"   "3.a.21"   "3.b.23"   "3.c.22"   "3.d.24"   "3.d.25"  
#>  [7] "3.d.26"   "3.d.27"   "3.d.28.1" "3.d.28.2" "3.d.29"

bits_hh <- bits_hh %>% 
  mutate(SD = factor(SD),
         SD = fct_recode(SD,
                         "20" = "3.a.20",
                         "21" = "3.a.21",
                         "22" = "3.c.22",
                         "23" = "3.b.23",
                         "24" = "3.d.24",
                         "25" = "3.d.25",
                         "26" = "3.d.26",
                         "27" = "3.d.27",
                         "28" = "3.d.28.1",
                         "28" = "3.d.28.2",
                         "29" = "3.d.29",
                         "30" = "3.d.30"),
         SD = as.character(SD)) 
#> Warning: Problem with `mutate()` input `SD`.
#> ℹ Unknown levels in `f`: 3.d.30
#> ℹ Input `SD` is `fct_recode(...)`.
#> Warning: Unknown levels in `f`: 3.d.30

# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$SD))
#>  [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#>  [1] "BotCurDir"         "BotCurSpeed"       "BotSal"           
#>  [4] "BotTemp"           "Buoyancy"          "BySpecRecCode"    
#>  [7] "CodendMesh"        "Country"           "DataType"         
#> [10] "DateofCalculation" "Day"               "DayNight"         
#> [13] "Depth"             "DepthStratum"      "Distance"         
#> [16] "DoorSpread"        "DoorSurface"       "DoorType"         
#> [19] "DoorWgt"           "Gear"              "GearEx"           
#> [22] "GroundSpeed"       "haul.id"           "HaulDur"          
#> [25] "HaulLat"           "HaulLong"          "HaulNo"           
#> [28] "HaulVal"           "HydroStNo"         "IDx"              
#> [31] "KiteDim"           "MaxTrawlDepth"     "MinTrawlDepth"    
#> [34] "Month"             "Netopening"        "PelSampType"      
#> [37] "Quarter"           "RecordType"        "Rect"             
#> [40] "Rigging"           "SD"                "SecchiDepth"      
#> [43] "Ship"              "Ship2"             "Ship3"            
#> [46] "ShootLat"          "ShootLong"         "SpeedWater"       
#> [49] "StatRec"           "StdSpecRecCode"    "StNo"             
#> [52] "SurCurDir"         "SurCurSpeed"       "SurSal"           
#> [55] "SurTemp"           "Survey"            "SweepLngt"        
#> [58] "SwellDir"          "SwellHeight"       "ThClineDepth"     
#> [61] "ThermoCline"       "Tickler"           "TidePhase"        
#> [64] "TideSpeed"         "TimeShot"          "TowDir"           
#> [67] "Turbidity"         "WarpDen"           "Warpdia"          
#> [70] "Warplngt"          "WgtGroundRope"     "WindDir"          
#> [73] "WindSpeed"         "WingSpread"        "X"                
#> [76] "Year"
bits_hh_merge <- bits_hh %>% 
                       dplyr::select(SD, Rect, HaulVal, StdSpecRecCode, BySpecRecCode,
                                     IDx, ShootLat, ShootLong)

bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")

# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(SD %in% c(24, 25, 26, 27, 28))

Now clean the CA data and go to 1 row 1 observation

bits_ca <- bits_ca %>% 
  filter(SpecCode %in% c("164712", "126436") & Year < 2020) %>% 
  mutate(Species = "Cod")

# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca)
# test_id <- head(filter(bits_ca, CANoAtLngt == 5))$ID[1]
# filter(bits_ca, ID == test_id & CANoAtLngt == 5)

bits_ca <- bits_ca %>% map_df(., rep, .$CANoAtLngt)

# head(data.frame(filter(bits_ca, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca)
# Looks ok!

# Standardize length and drop NA weights (need that for condition)
bits_ca <- bits_ca %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_g = IndWgt) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

# Plot
ggplot(bits_ca, aes(IndWgt, length_cm)) +
  geom_point() + 
  facet_wrap(~Year)


# Remove apparent outlier
bits_ca <- bits_ca %>%
  mutate(keep = ifelse(Year == 2010 & IndWgt > 12500, "N", "Y")) %>% 
  filter(keep == "Y") %>% dplyr::select(-keep)

ggplot(bits_ca, aes(IndWgt, length_cm)) +
  geom_point() + 
  facet_wrap(~Year)


# Rename things and select specific columns
dat <- bits_ca %>% rename("year" = "Year",
                          "lat" = "ShootLat",
                          "lon" = "ShootLong",
                          "quarter" = "Quarter",
                          "ices_rect" = "Rect") %>% 
  dplyr::select(weight_g, length_cm, year, lat, lon, quarter, IDx, ices_rect, SD)

# > nrow(dat)
# [1] 98863

Add in the environmental variables

Depth

# Read the tifs
west <- raster("data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)

east <- raster("data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)

dep_rast <- raster::merge(west, east)

dat$depth <- extract(dep_rast, dat[, 5:4])

# Convert to depth (instead of elevation)
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

dat$depth <- (dat$depth - max(drop_na(dat)$depth)) *-1
#> drop_na: no rows removed
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Oxygen

# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float o2b[longitude,latitude,time]   
#>             long_name: Sea_floor_Dissolved_Oxygen_Concentration
#>             missing_value: NaN
#>             standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#>             units: mmol m-3
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:324
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25551.5
#>         latitude  Size:166
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 52.991626739502
#>             valid_max: 58.4915390014648
#>         longitude  Size:253
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 23.013614654541
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#>         title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#>         file_quality_index: 1
#>         creation_date: 2020-11-16 UTC
#>         bullentin_date: 20191201
#>         start_date: 2019-12-01 UTC
#>         stop_date: 2019-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 52.99163 53.02496 53.05829 53.09163 53.12496 53.15829

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get oxygen
dname <- "o2b"

oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 253 166 324

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA

# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12

index_keep <- which(months > 9)

oxy_q4 <- oxy_array[, , index_keep]

months_keep <- months[index_keep]

years_keep <- years[index_keep]

# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(oxy_q4)[3], by = 3)

# Create objects that will hold data
dlist <- list()
oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave <- c()

# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
  
  oxy_10 <- oxy_q4[, , (i)]
  oxy_11 <- oxy_q4[, , (i + 1)]
  oxy_12 <- oxy_q4[, , (i + 2)]
  
  oxy_ave <- (oxy_10 + oxy_11 + oxy_12) / 3
  
  list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist[[list_pos]] <- oxy_ave
  
}

# Now name the lists with the year:
names(dlist) <- unique(years_keep)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: removed 1,305 rows (1%), 97,558 rows remaining

# Create data holding object
data_list <- list()

# ... And for the oxygen raster
raster_list <- list()

# Create factor year for indexing the list in the loop
d_sub_oxy$year_f <- as.factor(d_sub_oxy$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy$year_f)) {
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22

  # Subset a year
  oxy_slice <- dlist[[i]]
  
  # Create raster for that year (i)
  r <- raster(t(oxy_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
              crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r <- flip(r, direction = 'y')
  
  #plot(r, main = i)
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice <- d_sub_oxy %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp <- SpatialPoints(d_slice)
  
  # Extract raster value (oxygen)
  rasValue <- raster::extract(r, data_sp)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df <- as.data.frame(data_sp)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice$oxy <- rasValue
  
  # Add in which year
  d_slice$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index <- as.numeric(d_slice$year)[1] - 1992
  
  # Add each years' data in the list
  data_list[[index]] <- d_slice
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map.p <- rasterToPoints(r)
  
  # Make the points a dataframe for ggplot
  df_rast <- data.frame(map.p)
  
  # Rename y-variable and add year
  df_rast <- df_rast %>% rename("oxy" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list[[index]] <- df_rast
  
  # Make appropriate column headings
  colnames(df_rast) <- c("Longitude", "Latitude", "oxy")
  
  # Now make the map
  ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = oxy)) +
    geom_point(data = d_slice, aes(x = lon, y = lat, fill = oxy),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(xmin, xmax),
             ylim = c(ymin, ymax)) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(-200, 400)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(-200, 400)) +
    NULL
  
  ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,".png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)
  
}
#> filter: removed 96,361 rows (99%), 1,197 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,731 rows (98%), 1,827 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,528 rows (98%), 2,030 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,874 rows (98%), 1,684 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,471 rows (98%), 2,087 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,757 rows (98%), 1,801 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,653 rows (98%), 1,905 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,414 rows (97%), 3,144 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,525 rows (98%), 2,033 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,631 rows (98%), 1,927 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,149 rows (97%), 3,409 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,678 rows (95%), 4,880 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,720 rows (94%), 5,838 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,896 rows (94%), 5,662 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 90,570 rows (93%), 6,988 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,253 rows (94%), 6,305 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,495 rows (96%), 4,063 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,789 rows (95%), 4,769 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,475 rows (96%), 4,083 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,686 rows (97%), 2,872 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,286 rows (97%), 3,272 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,349 rows (96%), 4,209 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,298 rows (96%), 4,260 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,379 rows (94%), 6,179 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,746 rows (95%), 4,812 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,270 rows (97%), 3,288 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,524 rows (97%), 3,034 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

# Now create a data frame from the list of all annual values
big_dat_oxy <- dplyr::bind_rows(data_list)
big_raster_dat_oxy <- dplyr::bind_rows(raster_list)

# Plot data, looks like there's big inter-annual variation but a negative trend over time
big_raster_dat_oxy %>%
  group_by(year) %>%
  drop_na(oxy) %>%
  summarise(mean_oxy = mean(oxy)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, mean_oxy)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


big_raster_dat_oxy %>%
  group_by(year) %>%
  drop_na(oxy) %>%
  mutate(dead = ifelse(oxy < 0, "Y", "N")) %>%
  filter(dead == "Y") %>%
  mutate(n = n(),
         year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, n)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> mutate (grouped): new variable 'dead' (character) with 2 unique values and 0% NA
#> filter (grouped): removed 437,238 rows (93%), 31,671 rows remaining
#> mutate (grouped): new variable 'n' (integer) with 27 unique values and 0% NA
#>                   new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new oxygen column in the original data:
str(d_sub_oxy)
#> tibble [97,558 × 11] (S3: tbl_df/tbl/data.frame)
#>  $ weight_g : num [1:97558] 1176 1187 1199 1219 1433 ...
#>  $ length_cm: num [1:97558] 51 51 51 51 53 55 58 61 8 34 ...
#>  $ year     : int [1:97558] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat      : num [1:97558] 54.7 54.7 54.7 54.7 54.7 ...
#>  $ lon      : num [1:97558] 14.1 14.1 14.1 14.1 14.1 ...
#>  $ quarter  : int [1:97558] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ IDx      : chr [1:97558] "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" ...
#>  $ ices_rect: chr [1:97558] "38G4" "38G4" "38G4" "38G4" ...
#>  $ SD       : chr [1:97558] "24" "24" "24" "24" ...
#>  $ depth    : num [1:97558] 18 18 18 18 18 18 18 18 10 10 ...
#>  $ year_f   : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_oxy)
#> tibble [97,558 × 4] (S3: tbl_df/tbl/data.frame)
#>  $ lon : num [1:97558] 14.1 14.1 14.1 14.1 14.1 ...
#>  $ lat : num [1:97558] 54.7 54.7 54.7 54.7 54.7 ...
#>  $ oxy : num [1:97558] 327 327 327 327 327 ...
#>  $ year: chr [1:97558] "1993" "1993" "1993" "1993" ...

# Create an ID for matching the oxygen data with the cpue data
dat$id_oxy <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_oxy$id_oxy <- paste(big_dat_oxy$year, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")

# Which id's are not in the cpue data (dat)?
ids <- dat$id_oxy[!dat$id_oxy %in% c(big_dat_oxy$id_oxy)]

unique(ids)
#>  [1] "1991_14.0333_54.85"   "1991_14.1167_54.6667" "1991_13.7_54.75"     
#>  [4] "1991_13.85_54.7833"   "1991_13.8_54.65"      "1991_13.6667_54.6833"
#>  [7] "1991_13.5667_54.7167" "1991_13.45_54.75"     "1991_13.8_54.5167"   
#> [10] "1991_14.15_54.5167"   "1991_13.2435_54.9697" "1991_13.1167_54.95"  
#> [13] "1991_13.6833_54.9833" "1991_13.95_54.9333"   "1991_14.0833_54.5833"
#> [16] "1991_14.25_54.6"      "1991_13.4833_55.0167" "1991_13.6_55.0333"   
#> [19] "1991_13.85_55.1"      "1991_13.9042_55.0183" "1991_13.2772_55.21"  
#> [22] "1991_13.6492_55.2135" "1991_15.6596_55.4523" "1991_15.3273_55.9365"
#> [25] "1991_15.3685_55.95"   "1991_15.9797_55.8632" "1991_16.0537_55.7407"
#> [28] "1991_16.431_55.5713"  "1991_17.0363_55.9378" "1991_18.4345_56.224" 
#> [31] "1991_18.7987_57.1543" "1991_17.668_55.8398"  "1991_16.4383_55.6667"
#> [34] "1991_17.7227_56.0398" "1991_14.4605_55.4568" "1991_14.2167_55.15"  
#> [37] "1991_14.2833_55.1833" "1991_14.2833_55"      "1991_18.9297_57.112" 
#> [40] "1991_18.8237_57.0395" "1991_19.1567_57.3528" "1991_17.0417_57.5"   
#> [43] "1991_14.488_55.6708"  "1991_14.7052_55.574"  "1991_17.5455_57.496" 
#> [46] "1991_18.1203_57.8185" "1991_19.406_57.8703"  "1991_19.5915_57.8896"
#> [49] "1992_14.2833_55.1833" "1992_14.1667_55.1333" "1992_13.6333_55"     
#> [52] "1992_13.1167_54.7167" "1992_13.3333_54.7"    "1992_13.8_54.5167"   
#> [55] "1992_13.55_54.7333"   "1992_14.3_55.0333"    "1992_13.9_54.5"      
#> [58] "1992_13.65_54.6833"   "1992_13.8_54.6333"    "1992_14.35_55.1"     
#> [61] "1992_13.8167_54.75"   "1992_13.7_54.75"      "1992_13.2_54.8667"   
#> [64] "1992_13.1_54.9167"    "1992_13.1167_54.9833" "1992_14.1_54.7"      
#> [67] "1992_14.0333_54.8667" "1992_14.2_54.5"       "1992_14.3667_54.5167"
#> [70] "1992_14.2167_54.6333" "1992_13.9667_54.9333" "1992_13.8833_54.5667"
#> [73] "1992_13.1167_54.65"   "1992_14.1333_54.5667" "1992_13.45_55.0167"  
#> [76] "1992_13.7667_55.1"    "1992_13.55_55.0333"   "1992_13.8667_55.1"

# Select only the columns we want to merge
big_dat_sub_oxy <- big_dat_oxy %>% dplyr::select(id_oxy, oxy)

# Remove duplicate ID (one oxy value per id)
big_dat_sub_oxy2 <- big_dat_sub_oxy %>% distinct(id_oxy, .keep_all = TRUE)
#> distinct: removed 94,317 rows (97%), 3,241 rows remaining
# big_dat_sub_oxy %>% group_by(id_oxy) %>% mutate(n = n()) %>% arrange(desc(n))

# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_oxy2, by = "id_oxy")
#> left_join: added one column (oxy)
#>            > rows only in x    1,305
#>            > rows only in y  (     0)
#>            > matched rows     97,558
#>            >                 ========
#>            > rows total       98,863

# Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
# and it's been converted by the data host. Since it was converted without accounting for
# pressure or temperature, I can simply use the following conversion factor:
# 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
# https://ocean.ices.dk/tools/unitconversion.aspx

dat$oxy <- dat$oxy * 0.0223909

# Drop NA oxygen
dat <- dat %>% drop_na(oxy)
#> drop_na: removed 1,419 rows (1%), 97,444 rows remaining

Temperature

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float bottomT[longitude,latitude,time]   
#>             standard_name: sea_water_potential_temperature_at_sea_floor
#>             units: degrees_C
#>             long_name: Sea floor potential temperature
#>             missing_value: NaN
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:324
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25551.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#>         file_quality_index: 1
#>         creation_date: 2020-11-16 UTC
#>         bullentin_date: 20191201
#>         start_date: 2019-12-01 UTC
#>         stop_date: 2019-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get temperature
dname <- "bottomT"

temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 324

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA

# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12

index_keep <- which(months > 9)

# Quarter 4 by keeping months in index_keep
temp_q4 <- temp_array[, , index_keep]

months_keep <- months[index_keep]

years_keep <- years[index_keep]

# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(temp_q4)[3], by = 3)

# Create objects that will hold data
dlist <- list()
temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave <- c()

# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
  
  temp_10 <- temp_q4[, , (i)]
  temp_11 <- temp_q4[, , (i + 1)]
  temp_12 <- temp_q4[, , (i + 2)]
  
  temp_ave <- (temp_10 + temp_11 + temp_12) / 3
  
  list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist[[list_pos]] <- temp_ave
  
}

# Now name the lists with the year:
names(dlist) <- unique(years_keep)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: no rows removed

# Create data holding object
data_list <- list()

# ... And for the temperature raster
raster_list <- list()

# Create factor year for indexing the list in the loop
d_sub_temp$year_f <- as.factor(d_sub_temp$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp$year_f)) {
  
  # Subset a year
  temp_slice <- dlist[[i]]
  
  # Create raster for that year (i)
  r <- raster(t(temp_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
              crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r <- flip(r, direction = 'y')
  
  #plot(r, main = i)
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice <- d_sub_temp %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp <- SpatialPoints(d_slice)
  
  # Extract raster value (temperature)
  rasValue <- raster::extract(r, data_sp)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df <- as.data.frame(data_sp)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice$temp <- rasValue
  
  # Add in which year
  d_slice$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index <- as.numeric(d_slice$year)[1] - 1992
  
  # Add each years' data in the list
  data_list[[index]] <- d_slice
  
  # Save to check each year is ok! First convert the raster to points for plotting
  # (so that we can use ggplot)
  map.p <- rasterToPoints(r)
  
  # Make the points a dataframe for ggplot
  df_rast <- data.frame(map.p)
  
  # Rename y-variable and add year
  df_rast <- df_rast %>% rename("temp" = "layer") %>% mutate(year = i)
  
  # Add each years' raster data frame in the list
  raster_list[[index]] <- df_rast
  
  # Make appropriate column headings
  colnames(df_rast) <- c("Longitude", "Latitude", "temp")
  
  # Now make the map
  ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
    geom_raster(aes(fill = temp)) +
    geom_point(data = d_slice, aes(x = lon, y = lat, fill = temp),
               color = "black", size = 5, shape = 21) +
    theme_bw() +
    geom_sf(data = world, inherit.aes = F, size = 0.2) +
    coord_sf(xlim = c(min(dat$lon), max(dat$lon)),
             ylim = c(min(dat$lat), max(dat$lat))) +
    scale_colour_gradientn(colours = rev(terrain.colors(10)),
                           limits = c(2, 17)) +
    scale_fill_gradientn(colours = rev(terrain.colors(10)),
                         limits = c(2, 17)) +
    NULL
  
  ggsave(paste("figures/supp/cpue_temp_rasters/", i,".png", sep = ""),
         width = 6.5, height = 6.5, dpi = 600)
  
}
#> filter: removed 96,260 rows (99%), 1,184 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,617 rows (98%), 1,827 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,414 rows (98%), 2,030 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,760 rows (98%), 1,684 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,357 rows (98%), 2,087 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,643 rows (98%), 1,801 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,539 rows (98%), 1,905 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,300 rows (97%), 3,144 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,411 rows (98%), 2,033 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 95,517 rows (98%), 1,927 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,035 rows (97%), 3,409 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,577 rows (95%), 4,867 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,606 rows (94%), 5,838 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,782 rows (94%), 5,662 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 90,456 rows (93%), 6,988 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,153 rows (94%), 6,291 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,444 rows (96%), 4,000 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,677 rows (95%), 4,767 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,361 rows (96%), 4,083 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,573 rows (97%), 2,871 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,172 rows (97%), 3,272 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,243 rows (96%), 4,201 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 93,184 rows (96%), 4,260 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 91,265 rows (94%), 6,179 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 92,632 rows (95%), 4,812 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,156 rows (97%), 3,288 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 94,410 rows (97%), 3,034 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA

# Now create a data frame from the list of all annual values
big_dat_temp <- dplyr::bind_rows(data_list)
big_raster_dat_temp <- dplyr::bind_rows(raster_list)

big_dat_temp %>% drop_na(temp) %>% summarise(max = max(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#>     max
#>   <dbl>
#> 1  14.5
big_dat_temp %>% drop_na(temp) %>% summarise(min = min(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#>     min
#>   <dbl>
#> 1  3.53

# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_temp %>%
  group_by(year) %>%
  drop_na(temp) %>%
  summarise(mean_temp = mean(temp)) %>%
  mutate(year_num = as.numeric(year)) %>%
  ggplot(., aes(year_num, mean_temp)) +
  geom_point(size = 2) +
  stat_smooth(method = "lm") +
  NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'


# Now add in the new temperature column in the original data:
str(d_sub_temp)
#> tibble [97,444 × 13] (S3: tbl_df/tbl/data.frame)
#>  $ weight_g : num [1:97444] 1176 1187 1199 1219 1433 ...
#>  $ length_cm: num [1:97444] 51 51 51 51 53 55 58 61 8 34 ...
#>  $ year     : int [1:97444] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#>  $ lat      : num [1:97444] 54.7 54.7 54.7 54.7 54.7 ...
#>  $ lon      : num [1:97444] 14.1 14.1 14.1 14.1 14.1 ...
#>  $ quarter  : int [1:97444] 4 4 4 4 4 4 4 4 4 4 ...
#>  $ IDx      : chr [1:97444] "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" "1993.4.GFR.06S1.H20.28.41" ...
#>  $ ices_rect: chr [1:97444] "38G4" "38G4" "38G4" "38G4" ...
#>  $ SD       : chr [1:97444] "24" "24" "24" "24" ...
#>  $ depth    : num [1:97444] 18 18 18 18 18 18 18 18 10 10 ...
#>  $ id_oxy   : chr [1:97444] "1993_14.1333_54.7" "1993_14.1333_54.7" "1993_14.1333_54.7" "1993_14.1333_54.7" ...
#>  $ oxy      : num [1:97444] 7.32 7.32 7.32 7.32 7.32 ...
#>  $ year_f   : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_temp)
#> tibble [97,444 × 4] (S3: tbl_df/tbl/data.frame)
#>  $ lon : num [1:97444] 14.1 14.1 14.1 14.1 14.1 ...
#>  $ lat : num [1:97444] 54.7 54.7 54.7 54.7 54.7 ...
#>  $ temp: num [1:97444] 7.86 7.86 7.86 7.86 7.86 ...
#>  $ year: chr [1:97444] "1993" "1993" "1993" "1993" ...

# Create an ID for matching the temperature data with the cpue data
dat$id_temp <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_temp$id_temp <- paste(big_dat_temp$year, big_dat_temp$lon, big_dat_temp$lat, sep = "_")

# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_temp[!dat$id_temp %in% c(big_dat_temp$id_temp)]

unique(ids)
#> character(0)

# Select only the columns we want to merge
big_dat_sub_temp <- big_dat_temp %>% dplyr::select(id_temp, temp)

# Remove duplicate ID (one temp value per id)
big_dat_sub_temp2 <- big_dat_sub_temp %>% distinct(id_temp, .keep_all = TRUE)
#> distinct: removed 94,211 rows (97%), 3,233 rows remaining

# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_temp2, by = "id_temp")
#> left_join: added one column (temp)
#>            > rows only in x        0
#>            > rows only in y  (     0)
#>            > matched rows     97,444
#>            >                 ========
#>            > rows total       97,444

colnames(dat)
#>  [1] "weight_g"  "length_cm" "year"      "lat"       "lon"       "quarter"  
#>  [7] "IDx"       "ices_rect" "SD"        "depth"     "id_oxy"    "oxy"      
#> [13] "id_temp"   "temp"

dat <- dat %>% dplyr::select(-id_temp, -id_oxy)

# Drop NA temp
dat <- dat %>% drop_na(temp)
#> drop_na: no rows removed

Add UTM coords

# First add UTM coords
# Add UTM coords
# Function
LongLatToUTM <- function(x, y, zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

utm_coords <- LongLatToUTM(dat$lon, dat$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat$X <- utm_coords$X/1000 # for computational reasons
dat$Y <- utm_coords$Y/1000 # for computational reasons

Add cod and flounder density covariates

First by fitting models and then predicting to dat

d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")

# Calculate standardized variables
d <- d %>% 
  mutate(oxy_sc = oxy,
         temp_sc = temp,
         depth_sc = depth,
         ) %>%
  mutate_at(c("oxy_sc", "temp_sc", "depth_sc"),
            ~(scale(.) %>% as.vector)) %>% 
  mutate(year = as.integer(year)) %>% 
  drop_na(oxy, depth, temp) %>% 
  rename("density_cod" = "density") # to fit better with how flounder is named

# Also for the condition data which we predict on
dat <- dat %>% 
  mutate(oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy),
         temp_sc = (temp - mean(d$temp))/sd(d$temp),
         depth_sc = (depth - mean(d$depth))/sd(d$depth)) %>%
  mutate(year = as.integer(year))

hist(dat$oxy_sc)

hist(dat$temp_sc)

hist(dat$depth_sc)


# Explore data a bit
ggplot(d) + 
  geom_point(aes(year, density_cod, color = factor(SD))) + 
  facet_wrap(~SD)


# Explore data a bit
ggplot(d) + 
  geom_point(aes(year, density_fle, color = factor(SD))) + 
  facet_wrap(~SD)


# Make mesh:
spde <- make_mesh(d, xy_cols = c("X", "Y"),
                  n_knots = 200, 
                  type = "kmeans", seed = 42)

plot(spde)


# Depth spline + oxy spline
# Cod model
mcod <- sdmTMB(density_cod ~ 0 + as.factor(year) + s(depth_sc, k = 3) + s(oxy_sc, k = 3) + s(temp_sc, k = 3),
               data = d, spde = spde, family = tweedie(link = "log"),
               fields = "AR1", include_spatial = TRUE, time = "year",
               spatial_only = FALSE, reml = FALSE,
               control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

tidy(mcod, conf.int = TRUE)
#>                   term estimate std.error conf.low conf.high
#> 1  as.factor(year)1993 4.669278 0.4290490 3.828358  5.510199
#> 2  as.factor(year)1994 5.096382 0.4102930 4.292222  5.900542
#> 3  as.factor(year)1995 5.569538 0.4033938 4.778901  6.360175
#> 4  as.factor(year)1996 5.062511 0.4049442 4.268835  5.856187
#> 5  as.factor(year)1997 4.356420 0.3877978 3.596350  5.116489
#> 6  as.factor(year)1998 4.421859 0.3943089 3.649028  5.194691
#> 7  as.factor(year)1999 4.290892 0.3823467 3.541506  5.040278
#> 8  as.factor(year)2000 4.648275 0.3623439 3.938094  5.358456
#> 9  as.factor(year)2001 5.019057 0.3520562 4.329040  5.709075
#> 10 as.factor(year)2002 5.583997 0.3423569 4.912990  6.255004
#> 11 as.factor(year)2003 4.404675 0.3672783 3.684823  5.124528
#> 12 as.factor(year)2004 4.879646 0.3706699 4.153146  5.606146
#> 13 as.factor(year)2005 5.319951 0.3383369 4.656823  5.983079
#> 14 as.factor(year)2006 5.019436 0.3208472 4.390587  5.648285
#> 15 as.factor(year)2007 5.280742 0.3169020 4.659625  5.901858
#> 16 as.factor(year)2008 5.414750 0.3119337 4.803371  6.026129
#> 17 as.factor(year)2009 5.386120 0.3198729 4.759181  6.013059
#> 18 as.factor(year)2010 5.512173 0.3229510 4.879201  6.145146
#> 19 as.factor(year)2011 4.769352 0.3193194 4.143497  5.395206
#> 20 as.factor(year)2012 4.346913 0.3272673 3.705481  4.988345
#> 21 as.factor(year)2013 4.769957 0.3239729 4.134982  5.404932
#> 22 as.factor(year)2014 4.497734 0.3228297 3.864999  5.130469
#> 23 as.factor(year)2015 4.627084 0.3233500 3.993330  5.260838
#> 24 as.factor(year)2016 3.949509 0.3235552 3.315353  4.583666
#> 25 as.factor(year)2017 4.478004 0.3202236 3.850378  5.105631
#> 26 as.factor(year)2018 3.530848 0.3255207 2.892839  4.168857
#> 27 as.factor(year)2019 3.515000 0.3766067 2.776865  4.253136
#> 28       s(depth_sc).1       NA        NA       NA        NA
#> 29       s(depth_sc).2       NA        NA       NA        NA
#> 30         s(oxy_sc).1       NA        NA       NA        NA
#> 31         s(oxy_sc).2       NA        NA       NA        NA
#> 32        s(temp_sc).1       NA        NA       NA        NA
#> 33        s(temp_sc).2       NA        NA       NA        NA

# Check residuals of models
d$residualsmcod <- residuals(mcod)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmcod); abline(a = 0, b = 1)


mfle <- sdmTMB(density_fle ~ 0 + as.factor(year) + s(depth_sc, k = 3) + s(oxy_sc, k = 3) + s(temp_sc, k = 3),
               data = d, spde = spde, family = tweedie(link = "log"),
               fields = "AR1", include_spatial = TRUE, time = "year",
               spatial_only = FALSE, reml = FALSE,
               control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440872310777.

tidy(mfle, conf.int = TRUE)
#>                   term estimate std.error conf.low conf.high
#> 1  as.factor(year)1993 3.086344 0.5000604 2.106244  4.066445
#> 2  as.factor(year)1994 2.862403 0.4982786 1.885795  3.839011
#> 3  as.factor(year)1995 3.834157 0.4826551 2.888170  4.780144
#> 4  as.factor(year)1996 3.488019 0.4869127 2.533688  4.442351
#> 5  as.factor(year)1997 3.402728 0.4630563 2.495154  4.310302
#> 6  as.factor(year)1998 2.792034 0.4676152 1.875525  3.708543
#> 7  as.factor(year)1999 2.268694 0.4644380 1.358412  3.178975
#> 8  as.factor(year)2000 3.013916 0.4451605 2.141417  3.886414
#> 9  as.factor(year)2001 3.388237 0.4380149 2.529743  4.246730
#> 10 as.factor(year)2002 4.015457 0.4294779 3.173696  4.857218
#> 11 as.factor(year)2003 3.104267 0.4412371 2.239458  3.969076
#> 12 as.factor(year)2004 3.637542 0.4477572 2.759954  4.515130
#> 13 as.factor(year)2005 3.511923 0.4221554 2.684514  4.339333
#> 14 as.factor(year)2006 3.529843 0.4050171 2.736024  4.323662
#> 15 as.factor(year)2007 3.708212 0.4028041 2.918730  4.497693
#> 16 as.factor(year)2008 3.793654 0.3996016 3.010449  4.576859
#> 17 as.factor(year)2009 3.887496 0.4047775 3.094147  4.680846
#> 18 as.factor(year)2010 4.329963 0.4028294 3.540432  5.119494
#> 19 as.factor(year)2011 3.884919 0.3994758 3.101961  4.667877
#> 20 as.factor(year)2012 3.431478 0.4023971 2.642794  4.220162
#> 21 as.factor(year)2013 3.730560 0.4046115 2.937536  4.523584
#> 22 as.factor(year)2014 3.293708 0.4027768 2.504280  4.083136
#> 23 as.factor(year)2015 3.465576 0.4023112 2.677061  4.254092
#> 24 as.factor(year)2016 3.346314 0.4008326 2.560696  4.131931
#> 25 as.factor(year)2017 3.571983 0.4026322 2.782838  4.361127
#> 26 as.factor(year)2018 3.296749 0.4058768 2.501245  4.092253
#> 27 as.factor(year)2019 3.392583 0.4387341 2.532680  4.252486
#> 28       s(depth_sc).1       NA        NA       NA        NA
#> 29       s(depth_sc).2       NA        NA       NA        NA
#> 30         s(oxy_sc).1       NA        NA       NA        NA
#> 31         s(oxy_sc).2       NA        NA       NA        NA
#> 32        s(temp_sc).1       NA        NA       NA        NA
#> 33        s(temp_sc).2       NA        NA       NA        NA

# Check residuals of models
d$residualsmfle <- residuals(mfle)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmfle); abline(a = 0, b = 1)

I want to explore the flounder model a little bit more before I trust it, because in contrast to cod, I haven’t done so yet

# Read in pred_grid2 which has oxygen values at location and time and depth:
pred_grid2 <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/pred_grid2.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   X = col_double(),
#>   Y = col_double(),
#>   depth = col_double(),
#>   year = col_double(),
#>   deep = col_character(),
#>   oxy = col_double(),
#>   temp = col_double(),
#>   lon = col_double(),
#>   lat = col_double(),
#>   subdiv = col_character(),
#>   subdiv2 = col_character(),
#>   SubDiv = col_double()
#> )

# Standardize data with respect to prediction grid:
pred_grid2 <- pred_grid2 %>%
  mutate(year = as.integer(year)) %>% 
  filter(year %in% c(unique(d$year))) %>% 
  mutate(depth_sc = (depth - mean(d$depth))/sd(d$depth),
         temp_sc = (temp - mean(d$temp))/sd(d$temp),
         oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy)) %>% # Need to scale these to the mean and sd in the data!
  drop_na(oxy, depth, temp)
#> mutate: converted 'year' from double to integer (0 new NA)
#> filter: no rows removed
#> mutate: new variable 'depth_sc' (double) with 6,565 unique values and 0% NA
#>         new variable 'temp_sc' (double) with 252,019 unique values and 3% NA
#>         new variable 'oxy_sc' (double) with 252,208 unique values and 3% NA
#> drop_na: removed 9,477 rows (4%), 250,587 rows remaining

# Predict on the pred grid, calculate index
preds_fle <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 2*2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
index <- get_index(preds_fle, bias_correct = FALSE)
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440866758738.
index <- index %>% mutate(est_t = est/1000, lwr_t = lwr/1000, upr_t = upr/1000)
#> mutate: new variable 'est_t' (double) with 27 unique values and 0% NA
#>         new variable 'lwr_t' (double) with 27 unique values and 0% NA
#>         new variable 'upr_t' (double) with 27 unique values and 0% NA

# Plot index
ggplot() +
  geom_line(data = index, aes(year, est_t, color = "blue")) +
  geom_ribbon(data = index, aes(year, ymin = lwr_t, ymax = upr_t, fill = "blue"), alpha = 0.4) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 20)) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(color = FALSE) + 
  labs(x = "Year", y = "Tonnes", fill = "Sub Divisions", label = "") +
  theme_light(base_size = 11) +
  theme(axis.text.x = element_text(angle = 30),
        legend.position = c(0.8, 0.8),
        legend.background = element_blank()) +
  NULL


# Calculate an index that corresponds to the average so that I can compare it to the data
ncells <- filter(pred_grid2, year == max(pred_grid2$year)) %>% nrow()
#> filter: removed 241,306 rows (96%), 9,281 rows remaining
preds_fle_ave <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 1/ncells)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
index_ave <- get_index(preds_fle_ave, bias_correct = FALSE)
#> Warning: The model may not have converged. Maximum final gradient:
#> 0.0197440866758738.

d_sum <- d %>%
  ungroup() %>%
  group_by(year) %>%
  summarise(mean_density_fle = mean(density_fle))
#> ungroup: no grouping variables
#> group_by: one grouping variable (year)
#> summarise: now 27 rows and 2 columns, ungrouped

ggplot(index_ave, aes(year, est, color = "prediction")) +
  geom_line() +
  geom_ribbon(aes(ymin = lwr, ymax = upr, fill = "prediction"), alpha = 0.1) +
  geom_point(data = d_sum, aes(year, mean_density_fle, color = "data", size = 1.1)) +
  geom_line(data = d_sum, aes(year, mean_density_fle, color = "data"), linetype = 2) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(fill = FALSE) +
  labs(x = "Year", y = expression(paste("Density [kg/", km^2, "]", sep = ""))) +
  NULL


# Plot predictions on map
ggplot(swe_coast_proj) +
  geom_raster(data = preds_fle$data, aes(x = X * 1000, y = Y * 1000, fill = exp(est))) +
  geom_sf(size = 0.3) +
  scale_fill_viridis_c(trans = "sqrt") +
  facet_wrap(~ year, ncol = 5) +
  labs(x = "Longitude", y = "Latitude", fill = expression(kg/km^2)) +
  ggtitle("Predicted density (fixed + random)")


## Interesting! Now predict these model on dat - i.e. the condition data so that I can use those as covariates

Now make the predictions from the cpue models to the condition data

predict_mcod <- predict(mcod, newdata = dat)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
predict_mfle <- predict(mfle, newdata = dat)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

# Add to data
dat$density_cod <- exp(predict_mcod$est)
dat$density_fle <- exp(predict_mfle$est)

# Test to plot the relationships between a simple condition index and these variables
dat <- dat %>% 
  mutate(cond = weight_g / (0.01*length_cm^3))

# Remove apparent errors!
dat %>% arrange(desc(cond)) %>% data.frame() %>% head(20)
#>    weight_g length_cm year     lat     lon quarter                          IDx
#> 1     274.0         3 2015 55.7708 15.2827       4     2015.4.DEN.26D4.TVL.35.4
#> 2    1038.0        20 2015 55.1910 14.3030       4 2015.4.GFR.06SL.TVS.24275.43
#> 3      59.0         8 2015 54.7361 15.9506       4   2015.4.DEN.26D4.TVL.177.29
#> 4    2001.0        26 2007 55.8300 20.1000       4    2007.4.LTU.LTDA.TVS.Nov.7
#> 5     882.0        21 2006 55.7975 16.1985       4    2006.4.DEN.26D4.TVL.51.22
#> 6     314.0        15 2011 55.6498 14.6522       4    2011.4.DEN.26D4.TVL.24.30
#> 7     730.0        20 2006 55.6865 14.4562       4   2006.4.SWE.77AR.TVL.691.27
#> 8       4.0         4 2016 55.6692 14.7286       4     2016.4.DEN.26D4.TVL.18.2
#> 9       4.0         4 2016 55.6692 14.7286       4     2016.4.DEN.26D4.TVL.18.2
#> 10      6.0         5 2019 55.2762 13.7423       4 2019.4.GFR.06SL.TVS.24263.44
#> 11      2.6         4 2016 55.7647 15.3963       4     2016.4.DEN.26D4.TVL.33.5
#> 12      2.6         4 2016 55.7647 15.3963       4     2016.4.DEN.26D4.TVL.33.5
#> 13    199.0        17 2007 57.6500 21.2900       4      2007.4.EST.AA36.TVS.2.9
#> 14      5.0         5 2016 55.6692 14.7286       4     2016.4.DEN.26D4.TVL.18.2
#> 15      5.0         5 2016 55.6692 14.7286       4     2016.4.DEN.26D4.TVL.18.2
#> 16      1.0         3 2005 55.7128 16.7263       4    2005.4.SWE.77AR.TVL.608.9
#> 17      8.0         6 2012 54.8184 14.9531       4   2012.4.DEN.26D4.TVL.114.30
#> 18      1.0         3 2014 55.6985 14.3635       4    2014.4.SWE.26D4.TVL.29.15
#> 19     64.0        12 2015 55.4900 20.6466       4  2015.4.LTU.LTDA.TVS.26153.2
#> 20      1.0         3 2019 55.7087 16.1862       4   2019.4.DEN.26D4.TVL.147.39
#>    ices_rect SD depth      oxy      temp        X        Y      oxy_sc
#> 1       40G5 25    54 4.211386  8.335124 517.7356 6180.607 -0.45068187
#> 2       39G4 24    32 3.580219 12.520613 455.6264 6116.268 -0.73873926
#> 3       38G5 25    40 5.156504  8.856022 561.2060 6065.840 -0.01934049
#> 4       40H0 26    54 6.793494  6.698404 819.3147 6198.930  0.72776304
#> 5       40G6 25    52 2.523286  8.088043 575.1362 6184.192 -1.22111152
#> 6       40G4 25    57 3.699946  6.867472 478.1128 6167.159 -0.68409721
#> 7       40G4 25    59 5.966464  6.644550 465.8105 6171.323  0.35031593
#> 8       40G4 25    60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 9       40G4 25    60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 10      39G3 24    27 5.246214 13.486569 420.1026 6126.248  0.02160206
#> 11      40G5 25    54 2.664679 11.883115 524.8663 6179.963 -1.15658153
#> 12      40G5 25    54 2.664679 11.883115 524.8663 6179.963 -1.15658153
#> 13      44H1 28    29 7.524208  8.560642 875.0945 6407.166  1.06125291
#> 14      40G4 25    60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 15      40G4 25    60 3.376058 11.666626 482.9291 6169.297 -0.83191612
#> 16      40G6 25    40 7.730908  6.096197 608.4566 6175.466  1.15558852
#> 17      38G4 24    43 5.310305  8.318024 496.9863 6074.584  0.05085235
#> 18      40G4 25    31 6.831249 10.660970 459.9947 6172.708  0.74499401
#> 19      39H0 26    44 7.459182  7.222315 856.5823 6163.819  1.03157590
#> 20      40G6 25    61 2.813527 10.754423 574.5343 6174.297 -1.08864911
#>        temp_sc    depth_sc density_cod density_fle        cond
#> 1   0.01853258  0.32469272  1803.01035   46.451333 1014.814815
#> 2   1.79657189 -0.50466319   670.08589  169.075678   12.975000
#> 3   0.23981581 -0.20307923  1221.74913   29.591264   11.523438
#> 4  -0.67676290  0.32469272   981.17104  192.751880   11.384843
#> 5  -0.08642965  0.24929673   603.98199    5.011336    9.523810
#> 6  -0.60494098  0.43778671  1762.30606  112.708677    9.303704
#> 7  -0.69964084  0.51318270   830.73750   29.105067    9.125000
#> 8   1.43378931  0.55088070   867.55963   55.828338    6.250000
#> 9   1.43378931  0.55088070   867.55963   55.828338    6.250000
#> 10  2.20692020 -0.69315318   296.80415  336.644195    4.800000
#> 11  1.52575615  0.32469272   550.56935   31.413118    4.062500
#> 12  1.52575615  0.32469272   550.56935   31.413118    4.062500
#> 13  0.11433520 -0.61775718    77.12135 2724.817167    4.050478
#> 14  1.43378931  0.55088070   867.55963   55.828338    4.000000
#> 15  1.43378931  0.55088070   867.55963   55.828338    4.000000
#> 16 -0.93258677 -0.20307923   575.42186    3.910974    3.703704
#> 17  0.01126845 -0.08998524   532.04545   45.191306    3.703704
#> 18  1.00657635 -0.54236119  1191.06609  332.984641    3.703704
#> 19 -0.45419986 -0.05228724   384.22827  205.259755    3.703704
#> 20  1.04627620  0.58857869    90.53403    7.152002    3.703704

dat <- dat %>% filter(cond < 5)

ggplot(dat) +
  geom_point(aes(density_cod, cond), alpha = 0.2) + ggtitle("cod")


ggplot(dat) +
  geom_point(aes(density_fle, cond), alpha = 0.2) + ggtitle("fle")


# Now remove the standardized variables, because when I fit the real model, I want to
# standardize them to the prediction grid, not the CPUE data, and I would forget that
# if I don't remove them now
dat <- dat %>% dplyr::select(-oxy_sc, temp_sc, depth_sc)

… And secondly by doing a left_join from the CPUE data using haul.id

d_cod_select <- d %>% dplyr::select(density_cod, IDx) %>% rename("density_cod_data" = "density_cod")
d_fle_select <- d %>% dplyr::select(density_fle, IDx) %>% rename("density_fle_data" = "density_fle")

dat <- left_join(dat, d_cod_select)
dat <- left_join(dat, d_fle_select)

# Check how well they correspond
ggplot(dat, aes(density_cod_data, density_fle)) + geom_point() + geom_abline(color = "red")
#> Warning: Removed 10669 rows containing missing values (geom_point).

ggplot(dat, aes(density_fle_data, density_fle)) + geom_point() + geom_abline(color = "red")
#> Warning: Removed 10669 rows containing missing values (geom_point).

Add pelagic covariates

# Read data
spr <- read_xlsx("data/BIAS/abundances_rectangles_1991-2019.xlsx",
                 sheet = 1) %>%
  rename("StatRec" = "Rec") %>%
  mutate(StatRec = as.factor(StatRec),
         Species = "Sprat",
         abun_spr = `Age 1`+`Age 2`+`Age 3`+`Age 4`+`Age 5`+`Age 6`+`Age 7`+`Age 8+`, # omitting `0+` here
         IDr = paste(StatRec, Year, sep = ".")) # Make new ID)
  
her <- read_xlsx("data/BIAS/abundances_rectangles_1991-2019.xlsx",
                 sheet = 2) %>%
  as.data.frame() %>%
  rename("StatRec" = "Rect2") %>% # This is not called Rec in the data for some reason
  mutate(StatRec = as.factor(StatRec),
         Species = "Herring",
         abun_her = `Age 1`+`Age 2`+`Age 3`+`Age 4`+`Age 5`+`Age 6`+`Age 7`+`Age 8+`, # omitting `1+` here
         IDr = paste(StatRec, Year, sep = ".")) # Make new ID

# Plot distribution over time in the whole area
spr %>%
  mutate(lon = ices.rect(spr$StatRec)$lon) %>%
  mutate(lat = ices.rect(spr$StatRec)$lat) %>%
  filter(!StatRec %in% c("41G0", "41G1", "41G2", "42G1", "42G2", "43G1", "43G2", "44G0", "44G1")) %>%
  ggplot(., aes(lon, lat, fill = log(abun_spr))) +
  geom_raster() +
  scale_fill_viridis() +
  facet_wrap(~ Year, ncol = 5) +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax), ylim = c(ymin, ymax)) +
  labs(x = "lon", y = "lat") +
  NULL


ggsave("figures/supp/spr_distribution.png", width = 10, height = 10, dpi = 600)

her %>%
  mutate(lon = ices.rect(her$StatRec)$lon) %>%
  mutate(lat = ices.rect(her$StatRec)$lat) %>%
  filter(! StatRec %in% c("41G0", "41G1", "41G2", "42G1", "42G2", "43G1", "43G2", "44G0", "44G1")) %>%
  ggplot(., aes(lon, lat, fill = log(abun_her))) +
  geom_raster() +
  scale_fill_viridis() +
  facet_wrap(~ Year, ncol = 5) +
  geom_sf(data = world, inherit.aes = F, size = 0.2) +
  coord_sf(xlim = c(xmin, xmax), ylim = c(ymin, ymax)) +
  labs(x = "lon", y = "lat") +
  NULL


ggsave("figures/supp/her_distribution.png", width = 10, height = 10, dpi = 600)

# As can be seen in the above plots, we don't have data for all rectangles in all years
# It is important those rectangles are made NA - not 0 - when merging

# Check distribution of data
# https://www.researchgate.net/publication/47933620_Environmental_factors_and_uncertainty_in_fisheries_management_in_the_northern_Baltic_Sea/figures?lo=1
sort(unique(spr$SD))
#>  [1] "21"   "22"   "23"   "24"   "25"   "26"   "27"   "28_2" "29"   "30"  
#> [11] "31"   "32"
sort(unique(her$SD))
#>  [1] "21"   "22"   "23"   "24"   "25"   "26"   "27"   "28_2" "29"   "30"  
#> [11] "31"   "32"

# How many unique rows per IDr?
her %>%
  group_by(IDr) %>% 
  mutate(n = n()) %>% 
  ggplot(., aes(factor(n))) + geom_bar()


spr %>%
  group_by(IDr) %>% 
  mutate(n = n()) %>% 
  ggplot(., aes(factor(n))) + geom_bar()


# Ok, some ID's with two rows...
spr %>%
  group_by(IDr) %>% 
  mutate(n = n()) %>% 
  filter(n == 2) %>% 
  ungroup() %>% 
  as.data.frame() %>% 
  head(20)
#>    Year SD StatRec Area Age 0  Age 1 Age 2  Age 3  Age 4 Age 5 Age 6 Age 7
#> 1  1991 23    39G2   NA  0.00   0.00  0.00   0.00   0.00  0.00  0.00   0.0
#> 2  1991 24    39G2   NA  0.72   4.54  2.84   1.81   0.44  0.41  0.00   0.0
#> 3  1991 24    39G4   NA  0.00  85.12 96.36  69.69  20.55 15.87  0.70   1.4
#> 4  1991 25    39G4   NA  0.00   0.00  0.00   0.00   0.00  0.00  0.00   0.0
#> 5  1993 21    41G0   NA  0.00   0.01  0.01   0.00   0.00  0.00  0.00   0.0
#> 6  1993 21    41G2   NA  0.00   0.00  0.00   0.00   0.00  0.00  0.00   0.0
#> 7  1993 22    41G0   NA 66.88   6.35  2.87   0.74   0.28  0.00  0.00   0.0
#> 8  1993 23    39G2   NA  0.04   0.29  1.05   1.28   0.23  0.04  0.01   0.0
#> 9  1993 23    41G2   NA  0.00   0.04  0.37   0.47   0.04  0.05  0.00   0.0
#> 10 1993 24    39G2   NA  1.40  10.33 37.19  45.16   8.25  1.24  0.23   0.0
#> 11 1994 21    41G0   NA  1.01   6.79  1.58   0.00   0.00  0.00  0.00   0.0
#> 12 1994 21    41G1   NA 30.94 208.78 48.64   0.00   0.00  0.00  0.00   0.0
#> 13 1994 21    41G2   NA  2.63  12.74  4.24   0.00   0.00  0.00  0.00   0.0
#> 14 1994 22    41G0   NA 11.09   5.60  7.05   0.08   0.02  0.00  0.00   0.0
#> 15 1994 22    41G1   NA  0.61   0.31  0.39   0.00   0.00  0.00  0.00   0.0
#> 16 1994 23    39G2   NA  9.62   0.31  0.00   0.30   0.18  0.10  0.04   0.0
#> 17 1994 23    41G2   NA 42.55   0.88  0.00   0.00   0.00  0.00  0.00   0.0
#> 18 1994 24    39G2   NA 14.21   0.46  0.00   0.44   0.27  0.15  0.06   0.0
#> 19 1994 24    39G4   NA 71.57  12.04 39.94 254.48 222.32 43.48 11.83   0.0
#> 20 1994 25    39G4   NA  1.04   0.00  3.46   7.77   6.57  7.30  3.13   0.0
#>    Age 8+     1+ Species abun_spr       IDr n
#> 1    0.00   0.00   Sprat     0.00 39G2.1991 2
#> 2    0.06  10.10   Sprat    10.10 39G2.1991 2
#> 3    3.43 293.12   Sprat   293.12 39G4.1991 2
#> 4    0.00   0.00   Sprat     0.00 39G4.1991 2
#> 5    0.00   0.02   Sprat     0.02 41G0.1993 2
#> 6    0.00   0.00   Sprat     0.00 41G2.1993 2
#> 7    0.00  10.24   Sprat    10.24 41G0.1993 2
#> 8    0.00   2.90   Sprat     2.90 39G2.1993 2
#> 9    0.00   0.97   Sprat     0.97 41G2.1993 2
#> 10   0.00 102.40   Sprat   102.40 39G2.1993 2
#> 11   0.00   8.37   Sprat     8.37 41G0.1994 2
#> 12   0.00 257.42   Sprat   257.42 41G1.1994 2
#> 13   0.00  16.98   Sprat    16.98 41G2.1994 2
#> 14   0.00  12.75   Sprat    12.75 41G0.1994 2
#> 15   0.00   0.70   Sprat     0.70 41G1.1994 2
#> 16   0.00   0.93   Sprat     0.93 39G2.1994 2
#> 17   0.00   0.88   Sprat     0.88 41G2.1994 2
#> 18   0.00   1.38   Sprat     1.38 39G2.1994 2
#> 19   0.00 584.09   Sprat   584.09 39G4.1994 2
#> 20   1.67  29.90   Sprat    29.90 39G4.1994 2

# Seems to be due to rectangles somehow being in different sub divisions.
# I need to group by IDr and summarize
# First check all rectangles with more than one row have two rows and not more
nrow(spr)
#> [1] 2797
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(!n == 1))
#> [1] 204

spr_sum <- spr %>%
  group_by(IDr) %>% 
  summarise(abun_spr = sum(abun_spr)) %>% # Sum abundance within IDr
  distinct(IDr, .keep_all = TRUE) %>% # Remove duplicate IDr
  mutate(ID_temp = IDr) %>% # Create temporary IDr that we can use to split in order
                            # to get Year and StatRect back into the summarized data
  separate(ID_temp, c("StatRec", "Year"), sep = 4)

nrow(spr_sum) 
#> [1] 2695
nrow(spr)
#> [1] 2797
nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204

filter(spr_sum, IDr == "39G2.1991")
#> # A tibble: 1 x 4
#>   IDr       abun_spr StatRec Year 
#>   <chr>        <dbl> <chr>   <chr>
#> 1 39G2.1991     10.1 39G2    .1991
filter(spr, IDr == "39G2.1991")
#> # A tibble: 2 x 17
#>    Year SD    StatRec Area  `Age 0` `Age 1` `Age 2` `Age 3` `Age 4` `Age 5`
#>   <dbl> <chr> <fct>   <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1  1991 23    39G2    NA       0       0       0       0       0       0   
#> 2  1991 24    39G2    NA       0.72    4.54    2.84    1.81    0.44    0.41
#> # … with 7 more variables: `Age 6` <dbl>, `Age 7` <dbl>, `Age 8+` <dbl>,
#> #   `1+` <dbl>, Species <chr>, abun_spr <dbl>, IDr <chr>

# This should equal 1 (new # rows =  old - duplicated IDr)
nrow(spr_sum) / (nrow(spr) - 0.5*nrow(spr %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2)))
#> [1] 1

# How many rows per rectangle?
spr_sum %>%
  group_by(IDr) %>% 
  mutate(n = n()) %>% 
  ungroup() %>% 
  distinct(n)
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1     1
  
# Now do the same for herring
nrow(her)
#> [1] 2797
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(!n == 1))
#> [1] 204

her_sum <- her %>%
  group_by(IDr) %>% 
  summarise(abun_her = sum(abun_her)) %>% # Sum abundance within IDr
  distinct(IDr, .keep_all = TRUE) %>% # Remove duplicate IDr
  mutate(ID_temp = IDr) %>% # Create temporary IDr that we can use to split in order
  # to get Year and StatRect back into the summarized data
  separate(ID_temp, c("StatRec", "Year"), sep = 4)

nrow(her_sum) 
#> [1] 2695
nrow(her)
#> [1] 2797
nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2))
#> [1] 204

filter(her_sum, IDr == "39G2.1991")
#> # A tibble: 1 x 4
#>   IDr       abun_her StatRec Year 
#>   <chr>        <dbl> <chr>   <chr>
#> 1 39G2.1991     200. 39G2    .1991
filter(her, IDr == "39G2.1991")
#>   Year SD StatRec Area  Age 0 Age 1 Age 2 Age 3 Age 4 Age 5 Age 6 Age 7 Age 8+
#> 1 1991 23    39G2   NA  43.06  2.12  3.03  3.49  1.43  0.73  0.19  0.05   0.00
#> 2 1991 24    39G2   NA 122.19 27.29 34.37 70.16 31.02 12.15  8.29  0.54   4.71
#>       1+ Species abun_her       IDr
#> 1  11.04 Herring    11.04 39G2.1991
#> 2 188.53 Herring   188.53 39G2.1991

# This should equal 1 (new # rows =  old - duplicated IDr)
nrow(her_sum) / (nrow(her) - 0.5*nrow(her %>% group_by(IDr) %>% mutate(n = n()) %>% filter(n == 2)))
#> [1] 1

# How many rows per rectangle?
her_sum %>%
  group_by(IDr) %>% 
  mutate(n = n()) %>% 
  ungroup() %>% 
  distinct(n)
#> # A tibble: 1 x 1
#>       n
#>   <int>
#> 1     1

# Now join pelagic covariates
# Make ices_rect a factor in the main data
dat <- dat %>% mutate(ices_rect = as.factor(ices_rect))

# Create IDr in main data to match pelagics data
dat <- dat %>% mutate(IDr = paste(ices_rect, year, sep = "."))

# Are there any StatRec that are in the condition data that are not in the pelagics data?
dat$ices_rect[!dat$ices_rect %in% her$StatRec]
#> factor(0)
#> 51 Levels: 37G2 37G3 37G4 37G5 37G6 37G8 37G9 38G2 38G3 38G4 38G5 38G6 ... 45H1
dat$ices_rect[!dat$ices_rect %in% spr$StatRec]
#> factor(0)
#> 51 Levels: 37G2 37G3 37G4 37G5 37G6 37G8 37G9 38G2 38G3 38G4 38G5 38G6 ... 45H1

# No, but not all IDr's are present
head(dat$IDr[!dat$IDr %in% her$IDr])
#> [1] "37G4.1993" "37G4.1993" "37G4.1993" "37G4.1993" "40G4.1993" "40G4.1993"
head(dat$IDr[!dat$IDr %in% spr$IDr])
#> [1] "37G4.1993" "37G4.1993" "37G4.1993" "37G4.1993" "40G4.1993" "40G4.1993"

# Filter columns so that I only use sprat and herring IDr's that are in the condition data (don't need the others!)
spr_sum <- spr_sum %>% filter(IDr %in% dat$IDr)
her_sum <- her_sum %>% filter(IDr %in% dat$IDr)

# Select columns from pelagic data to go in dat
spr_sub <- spr_sum %>% dplyr::select(IDr, abun_spr)
her_sub <- her_sum %>% dplyr::select(IDr, abun_her)

# Now join dat and sprat data
dat <- left_join(dat, spr_sub)
nrow(dat)
#> [1] 97435

# And herring..
dat <- left_join(dat, her_sub)

# Now deal with the NA's
unique(is.na(spr_sum$abun_spr))
#> [1] FALSE
unique(is.na(her_sum$abun_her))
#> [1] FALSE

unique(is.na(dat$abun_spr))
#> [1] FALSE  TRUE
unique(is.na(dat$abun_her))
#> [1] FALSE  TRUE

dat %>% drop_na(abun_spr) %>% arrange(abun_spr) %>% dplyr::select(abun_spr)
#> # A tibble: 94,533 x 1
#>    abun_spr
#>       <dbl>
#>  1        0
#>  2        0
#>  3        0
#>  4        0
#>  5        0
#>  6        0
#>  7        0
#>  8        0
#>  9        0
#> 10        0
#> # … with 94,523 more rows
dat %>% drop_na(abun_her) %>% arrange(abun_her) %>% dplyr::select(abun_her)
#> # A tibble: 94,533 x 1
#>    abun_her
#>       <dbl>
#>  1        0
#>  2        0
#>  3        0
#>  4        0
#>  5        0
#>  6        0
#>  7        0
#>  8        0
#>  9        0
#> 10        0
#> # … with 94,523 more rows

# The NA's I have in the DAT are missing pelagic data, i.e. not 0's! Need to drop them unfortunately,
# or fit a model to the data. Since it's only 3% of data, I will simply remove them.
dat <- dat %>% drop_na(abun_spr) %>% drop_na(abun_her)

Save data

dat %>% data.frame() %>% head()
#>   weight_g length_cm year  lat     lon quarter                       IDx
#> 1     1176        51 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#> 2     1187        51 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#> 3     1199        51 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#> 4     1219        51 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#> 5     1433        53 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#> 6     1824        55 1993 54.7 14.1333       4 1993.4.GFR.06S1.H20.28.41
#>   ices_rect SD depth      oxy     temp        X        Y    temp_sc  depth_sc
#> 1      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 2      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 3      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 4      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 5      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#> 6      38G4 24    18 7.320176 7.858522 444.1463 6061.753 -0.1839327 -1.032435
#>   density_cod density_fle      cond density_cod_data density_fle_data       IDr
#> 1    690.7729    187.6693 0.8865369         698.3562         308.4765 38G4.1993
#> 2    690.7729    187.6693 0.8948293         698.3562         308.4765 38G4.1993
#> 3    690.7729    187.6693 0.9038756         698.3562         308.4765 38G4.1993
#> 4    690.7729    187.6693 0.9189527         698.3562         308.4765 38G4.1993
#> 5    690.7729    187.6693 0.9625395         698.3562         308.4765 38G4.1993
#> 6    690.7729    187.6693 1.0963186         698.3562         308.4765 38G4.1993
#>   abun_spr abun_her
#> 1   122.39   331.09
#> 2   122.39   331.09
#> 3   122.39   331.09
#> 4   122.39   331.09
#> 5   122.39   331.09
#> 6   122.39   331.09

# Trim data a little bit to not make it unnecessarily large
dat <- dat %>% dplyr::select(-IDx, quarter, cond, IDr)

write.csv(dat, file = "data/for_analysis/mdat_cond.csv", row.names = FALSE)